
## This code generates Figures 6 and 7 for ordinal IRT application to Asahi-Shimbun data
## varinf_3cat.R estimates variational model with Asahi-Shimbun data, generating out.VI3cat.1dim.PV0.RData
## "out.ordideal.1dimCons.PV0.RData" and "out.Gibbs3cat.1dim.PV0.RData" are output files from MCMC estimates
## in the Japan paper for validation
##
## Processing code here written by Yuki Shiraito, 10/1/2014
## Graphics code appended separately below here

setwd("~/Documents/working/japan")
rm(list = ls())

pv <- 0L  # 0:both, 1:politicians, 2:voters
n.dims <- 1L

### read in survey data
dat.all <- read.csv("dat_full.csv")[, -1]

### observations' attributes
obs.attri <- c("ken", "candidate", "dist", "year", "wave",
                "party", "chamber", "voter", "cities")
obs.attri <- dat.all[, names(dat.all) %in% obs.attri]

### choose observations
select.obs <- obs.attri$voter != 2 - pv
obs.attri <- obs.attri[select.obs, ]
dat.all <- dat.all[select.obs, !(names(dat.all) %in% names(obs.attri))]

### Missing Values
dat.all[dat.all == 99 | dat.all == 9 | dat.all == 66] <- NA

### drop NA questions
num.na <- apply(dat.all, 2, function(x) {sum(is.na(x))})
dat.all <- dat.all[, num.na != nrow(dat.all)]

## drop NA observations
num.na <- apply(dat.all, 1, function(x) {sum(is.na(x))})
obs.attri <- obs.attri[num.na != ncol(dat.all), ]
obs.ind <- 1:nrow(obs.attri)

rm(dat.all, num.na, select.obs)

## party ids for each wave
id.ldp <- c(rep(1, 7), rep(2, 2), 1)
id.dpj <- c(rep(2, 7), rep(1, 2), 2)
id.cgp <- c(rep(3, 8), 4, 4)
id.jcp <- c(rep(4, 8), 6, 6)
id.mirai <- c(rep(NA, 8), 3, 7) # after 2012 (Mirai in 2012, Seikatsu in 2013)
id.yp <- c(rep(NA, 6), 7, 7, 7, 5) # after 2009
id.jrp <- c(rep(NA, 8), 5, 3) # after 2012

## years of surveys
all.year <- c("2003", "2004", "2005", "2007", "2008", "2009", "2010", "2012",
              "2013")
lh.year <- c("2003", "2005", "2008", "2009", "2012")
uh.year <- c("2004", "2007", "2010", "2013")

## item names
dat.all <- read.csv("dat_full.csv")[, -1]
### preprocess data
source("prep.analysis.R", echo=TRUE)
### drop items with 2 response categories
drop.items <- apply(dat.all, 2, function(x){length(sort(unique(x)))}) == 2
dat.all <- dat.all[, !drop.items]
name.items <- colnames(dat.all)
rm(dat.all)

### load posterior sample
load(paste("out.VI3cat.", n.dims, "dim.PV", pv, ".RData", sep = ""))
colnames(out.varinf$means$x) <- "d1"
rownames(out.varinf$means$Delta) <- name.items
rownames(out.varinf$means$beta) <- name.items
rownames(out.varinf$means$tau) <- name.items

### load posterior sample (5 cat)
library(coda)
load(paste("out.ordideal.1dimCons.PV", pv, ".RData", sep = ""))

### combine two chains
extract.iter <- ((nrow(ordideal.out.list[[1]]$Lambda) / 2)
                 + 1):nrow(ordideal.out.list[[1]]$Lambda)
post.Lambda <- NULL
post.ideal <- NULL
post.tau <- NULL
for (i in 1:2) { 
    post.Lambda <- rbind(post.Lambda, ordideal.out.list[[i]]$Lambda[extract.iter, ])
    post.ideal <- rbind(post.ideal, ordideal.out.list[[i]]$ideal[extract.iter, ])
    post.tau <- rbind(post.tau, ordideal.out.list[[i]]$tau[extract.iter, ])
}
rm(ordideal.out.list)

alpha <- post.Lambda[, grep("[.]0", colnames(post.Lambda))]
alpha <- apply(alpha, 2, mean)
beta <- post.Lambda[, grep("[.]1", colnames(post.Lambda))]
beta <- apply(beta, 2, mean)
ideal5 <- apply(post.ideal, 2, mean)
ideal5 <- (ideal5 - mean(ideal5))/sd(ideal5)



#### VI PLOTS: 5 CAT
cor(ideal5, out.varinf$means$x)
cor(ideal5[obs.attri$voter==1], out.varinf$means$x[obs.attri$voter==1])
cor(ideal5[obs.attri$voter==0], out.varinf$means$x[obs.attri$voter==0])

## For 5 category results
# r=0.816 for voters, r=0.958 for legislators, r=0.913 overall

### Figure 6, right panel
pdf("legis5cat.pdf")
plot(ideal5[obs.attri$voter==0], out.varinf$means$x[obs.attri$voter==0], 
     xlab = "MCMC Estimate (5 category)", ylab = "EM Estimate (3 category)",
	 main="5 category MCMC",  xlim=c(-4,4), ylim=c(-4,4), type="n", bty="n",
	 cex.lab=1.3, cex.axis=1.5, cex.main=2)
abline(a=0, b=1, lwd=4, col="grey")
points(-ideal5[obs.attri$voter==0], out.varinf$means$x[obs.attri$voter==0], 
	pch=16, cex=0.7) 
text(3,-2,expression(paste(rho," = 0.96")), cex=2)
dev.off()



### load posterior sample (3 cat)
load("out.Gibbs3cat.1dim.PV0.RData")

### combine two chains
extract.iter <- ((nrow(ordideal.out.list[[1]]$Lambda) / 2)
                 + 1):nrow(ordideal.out.list[[1]]$Lambda)
post.Lambda <- NULL
post.ideal <- NULL
post.tau <- NULL
for (i in 1:2) { 
    post.Lambda <- rbind(post.Lambda, ordideal.out.list[[i]]$Lambda[extract.iter, ])
    post.ideal <- rbind(post.ideal, ordideal.out.list[[i]]$ideal[extract.iter, ])
    post.tau <- rbind(post.tau, ordideal.out.list[[i]]$tau[extract.iter, ])
}
rm(ordideal.out.list)

alpha <- post.Lambda[, grep("[.]0", colnames(post.Lambda))]
alpha <- apply(alpha, 2, mean)
beta <- post.Lambda[, grep("[.]1", colnames(post.Lambda))]
beta <- apply(beta, 2, mean)
ideal3 <- apply(post.ideal, 2, mean)
ideal3 <- (ideal3 - mean(ideal3))/sd(ideal3)



cor(ideal3, out.varinf$means$x)
cor(ideal3[obs.attri$voter==1], out.varinf$means$x[obs.attri$voter==1])
cor(ideal3[obs.attri$voter==0], out.varinf$means$x[obs.attri$voter==0])

## For 3 category results
# r=0.929 for voters, r=0.974 for legislators, r=0.959 overall

### Figure 6, left panel
pdf("legis3cat.pdf")
plot(out.varinf$means$x[obs.attri$voter==0], ideal3[obs.attri$voter==0], 
     xlab = "MCMC Estimate (3 category)", ylab = "EM Estimate (3 category)",
	 main="3 category MCMC", xlim=c(-4,4), ylim=c(-4,4), type="n", bty="n",
	 cex.lab=1.5, cex.axis=1.5, cex.main=2)
abline(a=0, b=1, lwd=4, col="grey")
points(-ideal3[obs.attri$voter==0], out.varinf$means$x[obs.attri$voter==0], 
	pch=16, cex=0.7) 
text(3,-2,expression(paste(rho," = 0.97")), cex=2)
dev.off()


### Figure 7
pdf("japan_boxplot.pdf")
plot(0, 0, type="n", ylim=c(-4,4.5), xlim=c(1,28), ylab="Estimate", xaxt="n", xlab="Year",
	cex.lab=1.2)
axis(1, at=c(2,7,12,17,22,27), labels=c("2003","2007","2009","2010","2012","2013"))

legend(1.0,4.7, c("MCMC 5 category","MCMC 3 category","EM 3 category"), fill=c("gray40","gray76","white"),
horiz=TRUE, bty="n")

boxplot(ideal5[obs.attri$voter==1 & obs.attri$year==2003], col="gray40", at=1, add=TRUE)
boxplot(ideal5[obs.attri$voter==1 & obs.attri$year==2007], col="gray40", at=6, add=TRUE)
boxplot(ideal5[obs.attri$voter==1 & obs.attri$year==2009], col="gray40", at=11, add=TRUE)
boxplot(ideal5[obs.attri$voter==1 & obs.attri$year==2010], col="gray40", at=16, add=TRUE)
boxplot(ideal5[obs.attri$voter==1 & obs.attri$year==2012], col="gray40", at=21, add=TRUE)
boxplot(ideal5[obs.attri$voter==1 & obs.attri$year==2013], col="gray40", at=26, add=TRUE)

boxplot(ideal3[obs.attri$voter==1 & obs.attri$year==2003], col="gray76", at=2, add=TRUE)
boxplot(ideal3[obs.attri$voter==1 & obs.attri$year==2007], col="gray76", at=7, add=TRUE)
boxplot(ideal3[obs.attri$voter==1 & obs.attri$year==2009], col="gray76", at=12, add=TRUE)
boxplot(ideal3[obs.attri$voter==1 & obs.attri$year==2010], col="gray76", at=17, add=TRUE)
boxplot(ideal3[obs.attri$voter==1 & obs.attri$year==2012], col="gray76", at=22, add=TRUE)
boxplot(ideal3[obs.attri$voter==1 & obs.attri$year==2013], col="gray76", at=27, add=TRUE)

boxplot(out.varinf$means$x[obs.attri$voter==1 & obs.attri$year==2003], col="white", at=3, add=TRUE)
boxplot(out.varinf$means$x[obs.attri$voter==1 & obs.attri$year==2007], col="white", at=8, add=TRUE)
boxplot(out.varinf$means$x[obs.attri$voter==1 & obs.attri$year==2009], col="white", at=13, add=TRUE)
boxplot(out.varinf$means$x[obs.attri$voter==1 & obs.attri$year==2010], col="white", at=18, add=TRUE)
boxplot(out.varinf$means$x[obs.attri$voter==1 & obs.attri$year==2012], col="white", at=23, add=TRUE)
boxplot(out.varinf$means$x[obs.attri$voter==1 & obs.attri$year==2013], col="white", at=28, add=TRUE)
dev.off()

